Project #03: Visualizing and Maintaining the Green Canopy of NYC
Introduction
This project will explore the NYC TreeMap data set and creating visualizations of NYC’s urban fabric. Based on these visualizations, I will propose a new program for the NYC Parks Department that attempts to make the benefits of NYC trees available to all New Yorkers.
Data Acquisition
Retrieve boundaries of districts
Code
library(sf)library(tidyverse)library(dplyr)library(httr2)suppressPackageStartupMessages(library(tidyverse))highlight <-function(x) {paste0('<span style="color:#e41a1c;"><b>', x, "</b></span>")}# Create data directory if it doesn't existif (!dir.exists("data/mp03")) {dir.create("data/mp03", recursive =TRUE, showWarnings =FALSE)cat("✓ Created data/mp03 directory\n")}get_council_districts <-function() {cat("\n=== Downloading NYC Council District Boundaries ===\n")# Use ArcGIS Hub - NYC Planning's official data portal api_url <-"https://services5.arcgis.com/GfwWNkhOj9bNBqoJ/arcgis/rest/services/NYC_City_Council_Districts/FeatureServer/0/query" geojson_file <-"data/mp03/council_districts_arcgis.geojson"# Download only if file doesn't exist (responsible downloading)if (!file.exists(geojson_file)) {cat("Downloading from ArcGIS Hub...\n") resp <-request(api_url) %>%req_url_query(where ="1=1", # Get all recordsoutFields ="*", # All fieldsreturnGeometry ="true", # Include geometriesf ="geojson"# GeoJSON format ) %>%req_headers(`User-Agent`="Mozilla/5.0 (Educational Project)") %>%req_perform()writeBin(resp_body_raw(resp), geojson_file)cat("✓ Downloaded GeoJSON file\n") } else {cat("✓ GeoJSON file already exists\n") }# Read GeoJSON districts <-st_read(geojson_file, quiet =TRUE)# Verify geometries are validif (all(st_is_empty(districts))) {stop("ERROR: Downloaded data has empty geometries!") }# Transform to WGS84 coordinate system (as required) districts <-st_transform(districts, crs ="WGS84")return(districts)}districts <-get_council_districts()
=== Downloading NYC Council District Boundaries ===
✓ GeoJSON file already exists
Download Tree Points
Code
library(sf)library(dplyr)library(httr2)library(fs)get_tree_points <-function(limit =50000) {cat("\n=== Downloading NYC Tree Points ===\n")# Base URL for the NYC OpenData Tree Points API (Socrata) base_url <-"https://data.cityofnewyork.us/resource/hn5i-inap.geojson"# Initialize pagination variables offset <-0 all_files <-list() page_num <-1# Page through entire dataset using $limit and $offset parametersrepeat { filename <-file.path("data/mp03", sprintf("trees_page_%03d.geojson", page_num))# Check if file already exists (avoid re-downloading)if (file.exists(filename)) {cat(sprintf("✓ Page %d already downloaded\n", page_num)) all_files[[page_num]] <- filename# Check if we got fewer results (indicates end of dataset) test_data <-st_read(filename, quiet =TRUE)if (nrow(test_data) < limit) {cat("✓ Reached end of dataset\n")break } offset <- offset + limit page_num <- page_num +1next }# Download this page using httr2cat(sprintf("Downloading page %d (offset: %d)...\n", page_num, offset)) resp <-request(base_url) %>%req_url_query(`$limit`= limit, `$offset`= offset) %>%req_headers(`User-Agent`="Mozilla/5.0 (Educational Project)") %>%req_retry(max_tries =3) %>%req_perform()# Save response to filewriteBin(resp_body_raw(resp), filename) all_files[[page_num]] <- filenamecat(sprintf("✓ Saved page %d\n", page_num))# Read to check how many records we got current_data <-st_read(filename, quiet =TRUE) n_records <-nrow(current_data)cat(sprintf(" Retrieved %d records\n", n_records))# If we got fewer than limit, we've reached the endif (n_records < limit) {cat("✓ Reached end of dataset\n")break }# Update for next iteration offset <- offset + limit page_num <- page_num +1# Be polite - add a small delay between requestsSys.sleep(0.5) }# Read and combine all GeoJSON filescat("\nCombining all tree point files...\n") tree_list <-lapply(all_files, function(f) {st_read(f, quiet =TRUE) }) trees <-bind_rows(tree_list)cat("✓ Successfully loaded", format(nrow(trees), big.mark =","), "trees\n")return(trees)}trees <-get_tree_points()
Here I’m joining the districts and trees together to do more analysis. This includes which district has the most trees, highest density, highest fraction of dead trees, and most common tree species. I will be using interactive data visualizations for this as well.
Code
library(sf)library(dplyr)library(gt)# Join tree points onto district polygonstrees_districts <-st_join(trees, districts, join = st_intersects, left =TRUE)# Check that expected columns existif (!"genusspecies"%in%names(trees_districts)) { trees_districts$genusspecies <-NA# add empty column if missing}if (!"CounDist"%in%names(trees_districts)) { trees_districts$CounDist <-NA# add empty column if missing}# Summarize tree counts per district (numeric CounDist)trees_per_district <- trees_districts %>%st_set_geometry(NULL) %>%# drop geometry for tablegroup_by(CounDist) %>%summarise(Trees =n(), .groups ="drop") %>%arrange(desc(Trees))# Take only the top districttop_district <- trees_per_district %>%slice_head(n =1)
1. Which council district has the most trees?
Code
# Make a table and highlight District 51top_district %>%gt() %>%tab_style(style =cell_fill(color ="lightblue"),locations =cells_body(columns =c("CounDist"), rows = CounDist ==51) ) %>%cols_label(CounDist ="Council District",Trees ="Number of Trees" ) %>%tab_header(title ="NYC District with the Most Trees",subtitle =paste("Total trees in this district:", top_district$Trees) )
NYC District with the Most Trees
Total trees in this district: 70928
Council District
Number of Trees
51
70928
NoteKey Finding
District 51 is the one with the most trees.
2. Which council district has the highest density of trees?
Code
# Add Shape_Area to your summarized table# 2. Summarize tree counts per districttrees_density <- trees_per_district %>%left_join( districts %>%st_set_geometry(NULL) %>%select(CounDist, Shape__Area), by ="CounDist" ) %>%mutate(Tree_Density = Trees / Shape__Area) %>%arrange(desc(Tree_Density)) # See the top district by tree density trees_density %>% slice_head(n = 2)trees_density %>%slice_head(n =3) %>%# top 3 by densityselect(CounDist, Tree_Density) %>%gt() %>%tab_header(title ="Top 3 NYC Districts with Highest Tree Density" ) %>%cols_label(CounDist ="Council District",Tree_Density ="Tree Density" ) %>%fmt_number(columns ="Tree_Density",decimals =4# adjust decimals as needed ) %>%tab_style(style =cell_fill(color ="lightblue"),locations =cells_body(columns =everything(),rows = CounDist %in%c(7, 39) ) )
Top 3 NYC Districts with Highest Tree Density
Council District
Tree Density
7
0.0003
39
0.0003
2
0.0002
NoteKey Finding
The council district with the highest tree density is 7 and 39.
3. Which district has highest fraction of dead trees out of all trees?
Code
# Summarize dead fraction per districtdead_fraction <- trees_districts %>%st_set_geometry(NULL) %>%# drop geometrygroup_by(CounDist) %>%# group by district as charactersummarise(Total_Trees =n(),Dead_Trees =sum(tpcondition =="Dead", na.rm =TRUE),Dead_Fraction = Dead_Trees / Total_Trees ) %>%arrange(desc(Dead_Trees))# Get top 5 districts by dead fractiontop_districts <- dead_fraction %>%slice_head(n =5)# Identify the top district for highlightingtop_district <- top_districts$CounDist[1]# Create the tablelibrary(gt)top_districts %>%select(CounDist, Total_Trees, Dead_Trees, Dead_Fraction) %>%gt() %>%cols_label(CounDist ="Council District",Total_Trees ="Total Trees",Dead_Trees ="Dead Trees",Dead_Fraction ="Dead Fraction" ) %>%fmt_number(columns =c(Dead_Fraction),decimals =2 ) %>%tab_header(title ="Top 5 NYC Council Districts by Dead Tree Fraction" ) %>%tab_style(style =cell_fill(color ="lightblue"),locations =cells_body(rows = CounDist == top_district ) )
Top 5 NYC Council Districts by Dead Tree Fraction
Council District
Total Trees
Dead Trees
Dead Fraction
51
70928
9147
0.13
50
52438
7041
0.13
19
49832
6335
0.13
23
44807
5828
0.13
49
35027
4569
0.13
NoteKey Finding
The district with the highest fraction of dead trees is 51
4. What is the most common tree species in Manhattan?
Code
# Add a borough column (you already did this)trees_districts <- trees_districts %>%mutate(Borough =case_when( CounDist >=1& CounDist <=10~"Manhattan", CounDist >=11& CounDist <=18~"Bronx", CounDist >=19& CounDist <=32~"Queens", CounDist >=33& CounDist <=48~"Brooklyn", CounDist >=49& CounDist <=51~"Staten Island",TRUE~"Other" ) )# Create most_common_manhattan tablemost_common_manhattan <- trees_districts %>%filter(Borough =="Manhattan") %>%st_set_geometry(NULL) %>%# drop geometry for tablegroup_by(genusspecies) %>%summarise(n =n()) %>%arrange(desc(n)) %>%slice_head(n =1) # only top species# Now display as a gt tablemost_common_manhattan %>%gt() %>%cols_label(genusspecies ="Tree Species",n ="Number of Trees" ) %>%tab_header(title ="Most Common Tree Species in Manhattan" )
Most Common Tree Species in Manhattan
Tree Species
Number of Trees
Gleditsia triacanthos var. inermis - Thornless honeylocust
17311
NoteKey Finding
The species in manhattan with most trees is Gleditsia triacanthos var. inermis - Thornless honeylocust.
5. What is the species of the tree closest to Baruch’s campus?
Code
library(units)library(sf)library(dplyr)# Function to create a pointnew_st_point <-function(lat, lon) {st_sfc(st_point(c(lon, lat))) |># longitude firstst_set_crs("WGS84")}# Baruch College coordinatesbaruch_point <-new_st_point(lat =40.7401, lon =-73.9832)# Project both trees and Baruch point to a planar CRS for distance in meterstrees_proj <-st_transform(trees, 2263)baruch_proj <-st_transform(baruch_point, 2263)# Compute distances in meterstrees_distance <- trees_proj %>%mutate(distance_m =as.numeric(st_distance(geometry, baruch_proj)))# Find closest treeclosest_tree <- trees_distance %>%slice_min(distance_m, n =1)
NoteKey Finding
The tree closest to Baruch College is a Quercus acutissima - sawtooth oak, located approximately 100.5 meters away.
Code
library(leaflet)library(sf)library(leaflet)library(dplyr)# Ensure trees is an sf object (with geometry column)class(trees) # should include "sf"
[1] "sf" "data.frame"
Code
# If not, convert using your geometry column:# trees <- st_as_sf(trees, coords = c("longitude", "latitude"), crs = 4326)# Ensure closest_tree keeps geometry, do NOT drop it# closest_tree <- trees_distance %>% slice_min(distance_m, n = 1)# Transform everything to WGS84 for Leaflettrees_wgs <-st_transform(trees, 4326)baruch_wgs <-st_transform(baruch_point, 4326)closest_tree_wgs <-st_transform(closest_tree, 4326)# Optional: create a small buffer around Baruch for context (e.g., 100m)buffer_wgs <-st_transform(st_buffer(st_transform(baruch_point, 2263), 100),4326)leaflet() %>%addProviderTiles(providers$OpenStreetMap) %>%addPolygons(data = buffer_wgs, color ="red", weight =2, fill =FALSE, group ="Buffer") %>%addCircleMarkers(data = trees_wgs,radius =2,color ="green",fillOpacity =0.3,group ="All Trees" ) %>%addCircleMarkers(data = closest_tree_wgs,radius =8,color ="darkgreen",fillColor ="darkgreen",fillOpacity =1,popup =paste0("Closest Tree: ", closest_tree_wgs$genusspecies),group ="Closest Tree" ) %>%addMarkers(data = baruch_wgs,popup ="Baruch College",label ="Baruch College" ) %>%addLayersControl(overlayGroups =c("Buffer", "All Trees", "Closest Tree"),options =layersControlOptions(collapsed =FALSE) ) %>%setView(lng =-73.9832, lat =40.7401, zoom =17)